pacman::p_load(tidyverse, data.table, broom, readxl)
rm(list=ls())
`%!in%` = Negate(`%in%`)
################################################################################

#Spatial units
spatial_id_a = 'county_id'
spatial_id_b = 'amc_id'

#Bilateral data specification
share_type = c('share_ij_tonnes') 
exporter_type = c('exporter_group') 
destination_type = 'destination_bloc' 
id_type_a = c('county_known')
id_type_b = c('county_known')
crop_list = c('soybean','maize')
year_max = 2018

##################### Land shares

for (country in c('argentina','brazil') ){
  
  df_l = fread(paste0('../../data/landuse/clean/landuse_',country,'_county.csv.gz'))
  df_u = fread(paste0('../../data/landuse/clean/geographicunits_',country,'.csv.gz'))

  if (country=='argentina'){
    
    #Spatial unit crosswalk
    df_u = df_u[, list(county_id, state_id, region, land_area_km2)]
    
    #Land
    df = merge(df_l, df_u, by='county_id')
    df = df[, c('forest','pasture') := list(forest_natural+forest_planted,pasture + forage_seasonal+forage_permanent)]
    setnames(df, old = c(spatial_id_a),  new = c('spatial_id'))
    df = df[,  list(year, region, state_id, spatial_id,pasture, forest, soybean, maize)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, region, state_id, spatial_id)][, unit:='hectares']
    setnames(df, old=c('soybean','maize','pasture'), new=c('soybean_ha','maize_ha','pasture_ha'))
    df[, c('agriculture_ha','nature_ha'):=list(soybean_ha+maize_ha+pasture_ha,forest)]
    df[, c('soybean','maize','pasture','share_agriculture'):=list(soybean_ha/agriculture_ha, maize_ha/agriculture_ha, pasture_ha/agriculture_ha, agriculture_ha/(agriculture_ha+nature_ha) )]
    df = df[,list(year, country, region, state_id, spatial_id, soybean, maize, pasture, share_agriculture)][is.finite(share_agriculture),]
    df_commodity = melt(df[,list(year, spatial_id, soybean, maize, pasture)], id.vars=c("year","spatial_id"), variable.name="commodity", value.name="share_commodity")
    df_nests = df[,list(year, country, region, state_id, spatial_id, share_agriculture)]
    df_land = merge(df_nests,df_commodity, by=c('year','spatial_id'))
        
    #Farm-gate commodity prices
    df = fread(paste0('../../data/prices/clean/farmgate_crops_argentina_',spatial_id_a,'.csv.gz'))
    df=df[year!=2018,]
    df[year==2017,]$year=2018
    df_pc = df[commodity %in% crop_list, list(year, region, state_id, spatial_id, commodity, price)][, lapply(.SD, mean, na.rm=TRUE), by=list(year, region, state_id, spatial_id, commodity)][, unit:='current USD per tonne']
    df = fread(paste0('../../data/prices/clean/farmgate_cattle_argentina_',spatial_id_a,'.csv.gz'))[, commodity:='pasture']
    setnames(df, old = c('value'),  new = c('price'))
    df_pp = df[establishment_type %in% c('all establishments') & value_type %in% c('price_per_tn'), list(year, region, state_id, spatial_id, commodity, price)][, lapply(.SD, mean, na.rm=TRUE), by=list(year, region, state_id, spatial_id, commodity)][, unit:='current USD per tonne']
    df_prices = rbind(df_pc, df_pp)
    
    df_ar = merge(df_land, df_prices[, list(year,spatial_id,commodity,price)], by=c('year','spatial_id','commodity'), all.x=T)
    df_ar$country = 'ARGENTINA'
    
  }else{
    
    #Spatial unit crosswalk
    df_u = df_u[, list(county_id, amc_id, microregion_id, mesoregion_id, state_id, region, land_area_km2)]
    
    #Land
    df = merge(df_l, df_u, by='county_id')
    df = df[, c('pasture','forest') := list(pasture_natural+pasture_planted,forest_natural+forest_planted)]
    setnames(df, old = c(spatial_id_b),  new = c('spatial_id'))
    df = df[,  list(year, region, state_id, spatial_id, pasture, forest, soybean, maize)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, region, state_id, spatial_id)][, unit:='hectares']
    setnames(df, old=c('soybean','maize','pasture'), new=c('soybean_ha','maize_ha','pasture_ha'))
    df[, c('agriculture_ha','nature_ha'):=list(soybean_ha+maize_ha+pasture_ha,forest)]
    df[, c('soybean','maize','pasture','share_agriculture'):=list(soybean_ha/agriculture_ha, maize_ha/agriculture_ha, pasture_ha/agriculture_ha, agriculture_ha/(agriculture_ha+nature_ha) )]
    df = df[,list(year, country, region, state_id, spatial_id, soybean, maize, pasture, share_agriculture)][is.finite(share_agriculture),]
    df_commodity = melt(df[,list(year, spatial_id, soybean, maize, pasture)], id.vars=c("year","spatial_id"), variable.name="commodity", value.name="share_commodity")
    df_nests = df[,list(year, country, region, state_id, spatial_id, share_agriculture)]
    df_land = merge(df_nests,df_commodity, by=c('year','spatial_id'))
    
    #Farm-gate commodity prices
    df = fread(paste0('../../data/prices/clean/farmgate_crops_brazil_',spatial_id_b,'.csv.gz'))
    df_pc = df[commodity %in% crop_list, list(year, region, state_id, spatial_id, commodity, price)][, lapply(.SD, mean, na.rm=TRUE), by=list(year, region, state_id, spatial_id, commodity)][, unit:='current USD per tonne']
    df = fread(paste0('../../data/prices/clean/farmgate_cattle_brazil_',spatial_id_b,'.csv.gz'))[, commodity:='pasture']
    setnames(df, old = c('value'),  new = c('price'))
    df_pp = df[establishment_type %in% c('cattle establishments')  & value_type %in% c('breeders_p','calves_p'), list(year, region, state_id, spatial_id, commodity, price)][, lapply(.SD, mean, na.rm=TRUE), by=list(year, region, state_id, spatial_id, commodity)][, unit:='current USD per head']
    beef_tn_per_head = 0.25
    df_pp$price = df_pp$price/beef_tn_per_head
    df_prices = rbind(df_pc, df_pp)
    
    df_br = merge(df_land, df_prices[, list(year,spatial_id,commodity,price)], by=c('year','spatial_id','commodity'), all.x=T)
    df_br$country = 'BRAZIL'
    
  }
}

df_land = rbind(df_ar,df_br)
setnames(df_land, old=c('price'), new=c('price_i'))



##################### Output elasticities

df = fread('inputs/parameters_supply.csv')[,list(spatial_id, theta_lambda, theta, frontier)]
df_factors = setDT(read_excel('../../data/factors/raw/factor_shares.xlsx'))[,c('commodity','land_share')]
df_params = df[, lapply(.SD, mean, na.rm=TRUE), by=list(spatial_id)]
df = merge(df_land,df_params,  by=c('spatial_id'))
df = merge(df,df_factors, by=c('commodity'))
df[, c('elasticity_PQ_i'):=list(  (  ( (theta_lambda-1)*(1-share_commodity) + (theta-1)*share_commodity*(1-share_agriculture)+1-land_share)/land_share )^(-1) ) ]
df[, c('elasticity_QP_i'):=list( 1/elasticity_PQ_i ) ] 
df[commodity=='pasture',]$commodity = 'beef'
df_elasticity = df


##################### Market shares

for (country in c('argentina','brazil') ){
  
  if (country=='argentina'){
  
    df = fread(paste0('../../data/agribusiness/clean/soybean_',country,'.csv.gz'))[id_type %in% id_type_a & state %!in% c('UNKNOWN','IMPORTS + STOCK') & destination_bloc != 'UNKNOWN',]
    setnames(df, old = c(spatial_id_a,exporter_type,destination_type),  new = c('spatial_id','firm','destination_unit'))
    
    #Bilateral shares and prices
    df_n_ij = df[, list(year, spatial_id, destination_unit, commodity, product_type, firm)][ , .(N_ij = length(unique(firm))), by=list(year, spatial_id, destination_unit, commodity, product_type)]
    df_n_i = df[, list(year, spatial_id, commodity, product_type, firm)][ , .(N_i = length(unique(firm))), by=list(year, spatial_id, commodity, product_type)]
    df_n = merge(df_n_ij,df_n_i, by=c('year','spatial_id','commodity','product_type'), all.x=T)
    
    df_q_ij = df[, list(year, country, region, state_id, spatial_id, destination_unit, equivalent_tonnes, fob_usd, commodity, product_type)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, country, region, state_id, spatial_id, destination_unit, commodity, product_type)]
    df_q_i = df[, list(year, country, region, state_id, spatial_id, equivalent_tonnes, fob_usd, commodity, product_type)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, country, region, state_id, spatial_id, commodity, product_type)]
    df_s = merge(df_q_ij,df_q_i, by=c('year','country','region','state_id','spatial_id','commodity','product_type'))[, c('share_ij_tonnes','share_ij_usd','price_ij','q_ij','value_ij') := list(equivalent_tonnes.x/equivalent_tonnes.y,fob_usd.x/fob_usd.y,fob_usd.x/equivalent_tonnes.x, equivalent_tonnes.x,fob_usd.x)][, list(year, country, region, state_id, spatial_id, destination_unit, share_ij_tonnes, share_ij_usd, price_ij, q_ij, value_ij, commodity, product_type)]
    
    df_a_t = merge(df_s,df_n, by=c('year','spatial_id','destination_unit','commodity','product_type'))
    df_a = df_a_t[year<=year_max,][, c("year") := NULL][, lapply(.SD, mean, na.rm=TRUE),  by=list(country, region, state_id, spatial_id, destination_unit, commodity, product_type)]
    
  }else{
    
    n_c=1
    for (commodity in c('soybean','maize','beef')) {
      
      df = fread(paste0('../../data/agribusiness/clean/',commodity,'_',country,'.csv.gz'))[id_type %in% id_type_b & state !='UNKNOWN STATE'  & destination != 'BRAZIL' & destination_bloc != 'UNKNOWN',]
      if(commodity=='beef'){df$port = df$hub}
      setnames(df, old = c(spatial_id_b,exporter_type,destination_type),  new = c('spatial_id','firm','destination_unit'))
      
      df_n_ij = df[, list(year, spatial_id, destination_unit, commodity, product_type, firm)][ , .(N_ij = length(unique(firm))), by=list(year, spatial_id, destination_unit, commodity, product_type)]
      df_n_i = df[, list(year, spatial_id, commodity, product_type, firm)][ , .(N_i = length(unique(firm))), by=list(year, spatial_id, commodity, product_type)]
      df_n = merge(df_n_ij,df_n_i, by=c('year','spatial_id','commodity','product_type'), all.x=T)
      
      df_q_ij = df[, list(year, country, region, state_id, spatial_id, destination_unit, equivalent_tonnes, fob_usd, commodity, product_type)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, country, region, state_id, spatial_id, destination_unit, commodity, product_type)]
      df_q_i = df[, list(year, country, region, state_id, spatial_id,equivalent_tonnes,fob_usd, commodity, product_type)][, lapply(.SD, sum, na.rm=TRUE), by=list(year, country, region, state_id, spatial_id, commodity, product_type)]
      df_s = merge(df_q_ij,df_q_i, by=c('year','country','region','state_id','spatial_id','commodity','product_type'))[, c('share_ij_tonnes','share_ij_usd','price_ij','q_ij','value_ij') := list(equivalent_tonnes.x/equivalent_tonnes.y,fob_usd.x/fob_usd.y,fob_usd.x/equivalent_tonnes.x, equivalent_tonnes.x,fob_usd.x)][, list(year, country, region, state_id, spatial_id, destination_unit, share_ij_tonnes, share_ij_usd, price_ij, q_ij, value_ij, commodity, product_type)]

      df_b_t_c = merge(df_s,df_n, by=c('year','spatial_id','destination_unit','commodity', 'product_type'))
      df_b_c = df_b_t_c[year<=year_max,][, c("year") := NULL][, lapply(.SD, mean, na.rm=TRUE),  by=list(country, region, state_id, spatial_id, destination_unit, commodity, product_type)]
      
      if(n_c==1){
        df_b_t = df_b_t_c
        df_b = df_b_c
      }else{
        df_b_t = rbind(df_b_t,df_b_t_c)
        df_b = rbind(df_b,df_b_c)
      }
      n_c=n_c+1
    } 
  }
}

df_shares = rbind(df_a_t,df_b_t)


##################### Price gaps, markdowns, and trade costs

df = merge(df_shares, df_elasticity, by=c('commodity','year','country','region','state_id','spatial_id'))
setnames(df, old = c(share_type),  new = c('share_ij'))
df=df[price_ij>0 & price_i>0,]
df$mu_i = (1+df$elasticity_PQ_i/df$N_i)^(-1)
df[N_i==0,]$mu_i=NA
df$mu_ij = (1+df$elasticity_PQ_i*df$share_ij/df$N_ij)^(-1)
df[N_ij==0,]$mu_ij=NA
df$N_i = as.numeric(df$N_i)
df$N_ij = as.numeric(df$N_ij)

df$price_ratio_ij = df$price_ij/df$price_i
max_var = quantile(df$price_ratio_ij,0.975)
min_var = quantile(df$price_ratio_ij,0.025)
df = df[price_ratio_ij>min_var & price_ratio_ij<max_var,]
df$price_ratio_ij = df$price_ratio_ij + (1-min(df$price_ratio_ij))

df[, c('v_pc','v_ic'):= list( (price_ratio_ij)^(-1) , (price_ratio_ij*mu_i)^(-1) ) ]
write.csv(df, gzfile(paste0("inputs/data_tradecosts_",destination_type,".csv.gz")), row.names = FALSE)

